# store the empirical means in an array
means<-numeric(K)
# keep making choices
while(TRUE)
{
# with probability epsilon, make random choice
if random < epsilon
x <- random from 1 to K
else
# otherwise choose option with highest empirical mean
x <- which.max(means)
# choose x and get a random reward
reward<- play(x)
# update means
means[x]<- update means[x] with reward
}# store the empirical means
means<-numeric(K)
# store times each option was played
ns<-array[K]
#keep track of how many games played
t<-0
# first play one game for each possible option
for( x in 1:i)
{
reward<-play(x)
ns[x]<-ns[x]+1
means[x]<- update means[x] with reward
t<-t+1
}
# now choose the arm with the maximum upper confidence bound
while(true)
{
x<- which.max(means[x] + sqrt(2*log(t)/ns[x]))
reward<- play(x)
ns[x]<-ns[x]+1
means[x]<- update means[x] with reward
t<-t+1
}# store the empirical means in an array
means<-numeric(K)
# keep making choices
while(TRUE)
{
# compute the sum of the exp function of the means
denominator<- sum(exp(means))
# probability of choosing arm i is proportional to its empirical average
probabilities<- exp(means)/denominator
# choose arm based on that probability
x<-sample(1:K,prob = probabilities,size = 1)
# play x and get a random reward
reward<- play(x)
# update means
means[x]<- update means[x] with reward
}\[ p_i = \frac{e^{\mu_i}}{\sum_{j=1}^K e^{\mu_j}} \]
# peek at everyone's profits and normalize them
masses<- (profits-min_profits)/(max_profits-min_profits)
# single out the top Z most profitable competitors
best<-order(masses)[1:Z]
# accelerate as if driven by "gravity" towards the best
acceleration<- acceleration+ sum(g * masses[me]*masses[best]/distance(me,best))
acceleration<- acceleration/masses[me]
# move (with some friction)
velocity<- velocity * runif() + acceleration
position<-position + velocity
play(position)# get pulled by your most profitable friend
friend_velocity<- distance(friend,me)
# get pulled by your best memory
memory_velocity<- distance(best_memory,friend)
# update velocity and position
velocity<-old_velocity * inertia + alpha * friend_velocity + beta * memory_velocity + runif()
position<-position + velocity
play(position)# with probability epsilon, explore
if(runif()<epsilon)
# shock your position by delta
position <- position + runif(min=-delta,max=delta)
else
# if a friend is doing better, go where he's going.
if(profits[me] < profits[best_friend])
position<- positions[best_friend]
# otherwise just keep your current position
play(position)
Social Annealing